{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "NRMEai9kN2R7"
   },
   "source": [
    "### Set up dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8o0GuNYnN4I6"
   },
   "outputs": [],
   "source": [
    "!pip install openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rPXQ25hBN7-_"
   },
   "outputs": [],
   "source": [
    "!pip install tiktoken"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Go5O5Lt6N9Z1"
   },
   "outputs": [],
   "source": [
    "import openai\n",
    "import pandas as pd\n",
    "import math\n",
    "import time\n",
    "import numpy as np\n",
    "import tiktoken\n",
    "\n",
    "#### Import main package: gpt_annotate.py\n",
    "# Make sure that the .py file is in the same directory as the .ipynb file, or you provide the correct relative or absolute path to the .py file.\n",
    "import gpt_annotate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "s2SVKJPAOAWM"
   },
   "outputs": [],
   "source": [
    "# don't type the key in this file! \n",
    "# create gpt_api.txt, put the key in that, and save\n",
    "with open('gpt_api_key.txt', 'r') as f:\n",
    "    key = f.read().strip()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "xb5K0uyrOlUY"
   },
   "source": [
    "### Load in text_to_annotate and codebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Abeu-_5nOvLL"
   },
   "outputs": [],
   "source": [
    "# Load text to annotate\n",
    "text_to_annotate = pd.read_csv(\"text_to_classify.csv\")\n",
    "# Load codebook\n",
    "with open('codebook.txt', 'r') as file:\n",
    "    codebook = file.read()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "apc7cy-eO8tF"
   },
   "source": [
    "# Annotate your data!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "a62wf8FzanCf"
   },
   "source": [
    "### If you have human labels you want to compare the GPT outputs against"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Dv7dc1mZPAGR"
   },
   "outputs": [],
   "source": [
    "# Prepare the data for annotation\n",
    "text_to_annotate = gpt_annotate.prepare_data(text_to_annotate, codebook, key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WzM16wq3ai0D"
   },
   "outputs": [],
   "source": [
    "# Annotate the data (returns 4 outputs)\n",
    "gpt_out_all, gpt_out_final, performance, incorrect =  gpt_annotate.gpt_annotate(text_to_annotate, codebook, key)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rd_dIEkxaseH"
   },
   "source": [
    "### If only using gpt_annotate for prediction (i.e., no human labels to compare performance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "yK1hIfCOPX1v"
   },
   "outputs": [],
   "source": [
    "# Prepare the data for annotation\n",
    "text_to_annotate = gpt_annotate.prepare_data(text_to_annotate, codebook, key, human_labels = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1h75bYJfbDPT"
   },
   "outputs": [],
   "source": [
    "# Annotate the data (returns 2 outputs)\n",
    "gpt_out_all, gpt_out_final =  gpt_annotate.gpt_annotate(text_to_annotate, codebook, key, human_labels = False)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
